No FB token sharing, API limits will easily be exceeded (Pedro said)
Failure mode: other modules of Netvizz
fb_scraper
Designed and programmed by Pedro.
Apply to be a Facebook developer.
In [1]:
import sys
In [2]:
sys.path.append('../../../facebook-scraper')
In [3]:
import fb_scraper
import fb_scraper.prodcons
import fb_scraper.job
Store the API token from 2. in a file called config.ini
, which should contain the following two lines
[creds]
access_token = yourTokenHere
Read the API token into Python
In [4]:
import configparser
In [5]:
config = configparser.ConfigParser()
config.read("config.ini")
creds = config["creds"]
Create a manager, a software object which runs the scrapes.
In [6]:
mgr = fb_scraper.prodcons.Manager(dict(creds))
In [7]:
ethospagejob = fb_scraper.job.PageJob(node_id="822254694506527")
Pass it to the manager
In [8]:
mgr.add_job(ethospagejob)
In [9]:
mgr.start()
In [10]:
def get_job_filepath(job, dataset):
"""Get a dataset file path of job"""
assert dataset in job.writers, "Job {} does not contain dataset {}".format(job.job_id, dataset)
return "".join([job.writers[dataset].path, job.writers[dataset].file_name])
In [11]:
import pandas as pd
%matplotlib inline
Let's see what did we acquire.
In [12]:
ethosposts = pd.read_csv(get_job_filepath(ethospagejob, 'posts'),
parse_dates=['created_time', 'updated_time'], skipfooter=1, engine="python")
In [13]:
ethosposts.shape
Out[13]:
In [14]:
ethosposts.sample(3)
Out[14]:
In [15]:
ethosposts.type.value_counts().plot.pie(autopct='%.2f', figsize=(6, 6));
Did we make it so far? Brilliant!
Compare output with Netvizz files. What columns?
In [16]:
import load_csv
from fb_gexf.fbgexf import GexfManager, CoReactionGraph, UserCoInteractionGraph
In [17]:
print(CoReactionGraph.__doc__)
In [18]:
def write_graph(job, gtype):
"""Write out a graph of type CoReactionGraph of UserCoInteractionGraph for job"""
assert isinstance(job, fb_scraper.job.Job)
assert gtype in ['CoReactionGraph', 'UserCoInteractionGraph']
gmgr = GexfManager()
posts = load_csv.load_posts(get_job_filepath(job, 'posts'))
comments = load_csv.load_comments(get_job_filepath(job, 'comments'))
reactions = load_csv.load_reactions(get_job_filepath(job, 'reactions'))
gpath = job.writers['posts'].path
if gtype == 'CoReactionGraph':
gmgr.add_method(CoReactionGraph(gpath))
elif gtype == 'UserCoInteractionGraph':
gmgr.add_method(UserCoInteractionGraph(gpath))
gmgr.load(posts, comments, reactions)
gmgr.write()
In [19]:
write_graph(ethospagejob, 'CoReactionGraph')
In [20]:
print(UserCoInteractionGraph.__doc__)
In [21]:
write_graph(ethospagejob, 'UserCoInteractionGraph')
In [34]:
mgr = fb_scraper.prodcons.Manager(dict(creds))
In [ ]:
myjob1 = fb_scraper.job.PageJob() # provide node_id='2931293812093' inside the parentheses
myjob2 = fb_scraper.job.GroupJob() # provide node_id='2931293812093' inside the parentheses
mgr.add_job(myjob1)
mgr.add_job(myjob2)
In [ ]:
mgr.start()
...wait for the jobs to finish
In [ ]:
write_graph(myjob1, 'CoReactionGraph')
write_graph(myjob2, 'UserCoInteractionGraph')